www.gusucode.com > wxApp PHP版微信小程序CMS系统 v1.0PHP源码程序 > wxApp PHP版微信小程序CMS系统 v1.0/wxAppCMS_v1.0.0/wxAppCMS_v1.0.0/app/spider/spider.class.php
<?php /** * iCMS - i Content Management System * Copyright (c) 2007-2017 iCMSdev.com. All rights reserved. * * @author icmsdev <master@icmsdev.com> * @site https://www.icmsdev.com * @licence https://www.icmsdev.com/LICENSE.html */ class spider{ public static $cid = null; public static $rid = null; public static $pid = null; public static $sid = null; public static $poid = null; public static $title = null; public static $url = null; public static $work = false; public static $urlslast = null; public static $allHtml = array(); public static $dataTest = false; public static $ruleTest = false; public static $content_right_code = false; public static $content_error_code = false; public static $referer = null; public static $encoding = null; public static $useragent = null; public static $cookie = null; public static $charset = null; public static $curl_proxy = false; public static $proxy_array = array(); public static $PROXY_URL = false; public static $callback = array(); public static $spider_url_ids = array(); public static function rule($id) { $rs = iDB::row("SELECT * FROM `#iCMS@__spider_rule` WHERE `id`='$id' LIMIT 1;", ARRAY_A); $rs['rule'] && $rs['rule'] = stripslashes_deep(unserialize($rs['rule'])); $rs['rule']['user_agent'] OR $rs['rule']['user_agent'] = "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)"; spider::$useragent = $rs['rule']['user_agent']; spider::$encoding = $rs['rule']['curl']['encoding']; spider::$referer = $rs['rule']['curl']['referer']; spider::$cookie = $rs['rule']['curl']['cookie']; spider::$charset = $rs['rule']['charset']; return $rs; } public static function project($id) { return iDB::row("SELECT * FROM `#iCMS@__spider_project` WHERE `id`='$id' LIMIT 1;", ARRAY_A); } public static function postArgs($id) { $postRs = iDB::row("SELECT * FROM `#iCMS@__spider_post` WHERE `id`='$id' LIMIT 1;"); if ($postRs->post) { $postArray = explode("\n", $postRs->post); $postArray = array_filter($postArray); foreach ($postArray AS $key => $pstr) { list($pkey, $pval) = explode("=", $pstr); if(strpos($pkey, '[')!==false && strpos($pkey, ']')!==false){ preg_match('/(.+)\[(.+)\]/', $pkey,$match); $_POST[$match[1]][$match[2]] = trim($pval); }else{ $_POST[$pkey] = trim($pval); } } return $postRs; } } public static function checker($work = null,$pid=null,$url=null,$title=null){ $pid ===null && $pid = spider::$pid; $url ===null && $url = spider::$url; $title ===null && $title = spider::$title; $project = spider::project($pid); $hash = md5($url); if(($project['checker'] && empty($_GET['indexid'])) || $work=="DATA@RULE"){ $title = addslashes($title); $url = addslashes($url); $project_checker = $project['checker']; $work=="DATA@RULE" && $project_checker = '1'; switch ($project_checker) { case '1'://按网址检查 $sql = "`url` = '$url'"; $label = $url.PHP_EOL; $msg = $label.'该网址的文章已经发布过!请检查是否重复'; break; case '2'://按标题检查 $sql = "`title` = '$title'"; $label = $title.PHP_EOL; $msg = $label.'该标题的文章已经发布过!请检查是否重复'; break; case '3'://网址和标题 $sql = "`url` = '$url' AND `title` = '$title'"; $label = $title.PHP_EOL.$url; $msg = $label.'该网址和标题的文章已经发布过!请检查是否重复'; break; } switch ($project['self']) { case '1': $sql.=" AND `pid`='".$pid."'"; break; case '2': $sql.=" AND `rid`='".spider::$rid."'"; break; } $checker = iDB::value("SELECT `id` FROM `#iCMS@__spider_url` where $sql AND `publish` in(1,2)"); if($checker){ $work===NULL && iUI::alert($msg, 'js:parent.$("#' . $hash . '").remove();'); if($work=='shell'){ echo "\n\033[35m".$msg."\033[0m\n\n"; return false; } if($work=="WEB@AUTO"){ return '-1'; } return false; }else{ return true; } } return true; } public static function update_spider_url_indexid($suid,$indexid){ iDB::update('spider_url',array( //'publish' => '1', 'indexid' => $indexid, //'pubdate' => time() ),array('id'=>$suid)); self::update_spider_url_ids($indexid); } public static function update_spider_url_publish($suid){ iDB::update('spider_url',array( 'publish' => '1', 'pubdate' => time() ),array('id'=>$suid)); self::update_spider_url_ids(); } public static function update_spider_url_ids($indexid=0){ foreach ((array)spider::$spider_url_ids as $key => $suid) { if($indexid){ $data = array( 'indexid' => $indexid ); }else{ $data = array( 'pid' => spider::$pid, 'publish' => '1', 'status' => '1', 'pubdate' => time() ); } iDB::update('spider_url',$data,array('id'=>$suid)); } } public static function errorlog($msg,$url=null,$type=0,$a=null) { $data = array( 'work' => spider::$work, 'rid' => (int)spider::$rid, 'sid' => (int)spider::$sid, 'pid' => (int)spider::$pid, 'url' => ($url?$url:spider::$url), 'msg' => addslashes($msg), 'date' => date("Y-m-d"), 'addtime' => time(), 'type' => $type ); $a && $data = array_merge($data,(array)$a); iDB::insert('spider_error',$data); return $msg; } public static function publish($work = null) { @set_time_limit(0); $_POST = spider_data::crawl(); spider_tools::listItemCache($_POST['reurl'],'delete'); foreach ((array)$_POST as $key => $value) { if($value===null && $key!='__title__'){ echo spider::errorlog("publish:$key:null\n",$_POST['reurl'],"publish:$key:null"); return null; } } if($_POST===false){ return false; } if(spider::$work && $work===null) $work = spider::$work; // if($work=='shell'){ // if(empty($_POST['title'])){ // echo spider::errorlog("标题不能为空\n",$_POST['reurl'],'publish.title'); // return false; // } // if(empty($_POST['body'])){ // echo spider::errorlog("内容不能为空\n",$_POST['reurl'],'publish.body'); // return false; // } // } $checker = spider::checker($work,spider::$pid,$_POST['reurl'],$_POST['title']); if($checker!==true){ return $checker; } $project = spider::project(spider::$pid); if(!isset($_POST['cid'])){ $_POST['cid'] = $project['cid']; } $poid = $project['poid']; spider::$poid && $poid = spider::$poid; $postArgs = spider::postArgs($poid); $appid = $_POST['appid']?:$postArgs->app; $app = apps::get_app($appid); if($_GET['indexid']){ self::get_data_id((int)$_GET['indexid'],$app); } $title = addslashes($_POST['title']); $url = addslashes($_POST['reurl']); $hash = md5($url); if(empty(spider::$sid)){ $spider_url = iDB::row("SELECT `id`,`publish`,`indexid` FROM `#iCMS@__spider_url` where `url`='$url'",ARRAY_A); if(empty($spider_url)){ $spider_url_data = array( 'appid' => $app['id'], 'cid' => $project['cid'], 'rid' => spider::$rid, 'pid' => spider::$pid, 'title' => $title, 'url' => $url, 'hash' => $hash, 'status' => '1', 'addtime' => time(), 'publish' => '0', 'indexid' => '0', 'pubdate' => '' ); $suid = iDB::insert('spider_url',$spider_url_data); }else{ if($spider_url['indexid']){ self::get_data_id($spider_url['indexid'],$app); } $suid = $spider_url['id']; } }else{ $suid = spider::$sid; } if (spider::$callback['post'] && is_callable(spider::$callback['post'])) { $_POST = call_user_func_array(spider::$callback['post'],array($_POST)); if($_POST['callback']){ return $_POST; } } iSecurity::_addslashes($_POST); $fun = $postArgs->fun; $return = "1001"; if(iFS::checkHttp($fun)){ $json = self::postUrl($fun,$_POST); $callback = json_decode ($json,true); if($callback['code']==$return){ $indexid = $callback['indexid']; self::update_spider_url_indexid($suid,$indexid); self::update_spider_url_publish($suid); } }else{ $obj = $postArgs->app."Admincp"; $acp = new $obj; $acp->callback['code'] = $return; /** * 主表 回调 更新关联ID */ $acp->callback['primary'] = array( array('spider','update_spider_url_indexid'), array('suid'=>$suid) ); /** * 数据表 回调 成功发布 */ $acp->callback['data'] = array( array('spider','update_spider_url_publish'), array('suid'=>$suid) ); $callback = $acp->$fun(); if(!$callback){ echo spider::errorlog("发布失败\n",$_POST['reurl'],'publish.fail'); return false; } } if ($callback['code'] == $return && $work===NULL) { if (spider::$sid) { iUI::success("发布成功!",'js:1'); } else { iUI::success("发布成功!", 'js:parent.$("#' . $hash . '").remove();'); } } if (spider::$callback['save'] && is_callable(spider::$callback['save'])) { $ret = call_user_func_array(spider::$callback['save'],array($callback,$_POST)); if($ret['callback']){ return $ret; } } if($work=="shell"||$work=="WEB@AUTO"){ $callback['work']=$work; return $callback; } } public static function callback($obj,$indexid,$type = null) { if ($type === null || $type == 'primary') { if ($obj->callback['primary']) { $PCB = $obj->callback['primary']; $handler = $PCB[0]; $params = (array) $PCB[1]; $indexid && $params+= array('indexid' => $indexid); $obj->callback['return'] = array( "code" => $obj->callback['code'] )+$params; if (is_callable($handler)) { call_user_func_array($handler, $params); } } } if ($type === null || $type == 'data') { if ($obj->callback['data']) { $DCB = $obj->callback['data']; $handler = $DCB[0]; $params = (array) $DCB[1]; if (is_callable($handler)) { call_user_func_array($handler, $params); } } } } public static function get_data_id($indexid,$app) { $data_table = apps_mod::get_data_table($app['table']); if($data_table){ $data_id_key = $data_table['primary']; $union_key = $data_table['union']; $table_name = $data_table['name']; if($indexid){ $_POST[$union_key] = $indexid; $_POST[$data_id_key] = iDB::value("SELECT `{$data_id_key}` FROM `#iCMS@__{$table_name}` WHERE `{$union_key}`='{$indexid}'"); } }else{ if($app['app']=='article' && $indexid){ $_POST['article_id'] = $indexid; $_POST['data_id'] = iDB::value("SELECT `id` FROM `#iCMS@__article_data` WHERE aid='".$indexid."'"); } } } public static function postUrl($url, $data) { is_array($data) && $data = http_build_query($data); $options = array( CURLOPT_URL => $url, CURLOPT_REFERER => $_SERVER['HTTP_REFERER'], CURLOPT_USERAGENT => $_SERVER['HTTP_USER_AGENT'], CURLOPT_POSTFIELDS => $data, // CURLOPT_HTTPHEADER => array( // 'Content-Type:application/x-www-form-urlencoded', // 'Content-Length:'.strlen($data), // 'Host: www.icmsdev.com' // ), CURLOPT_POST => 1, CURLOPT_TIMEOUT => 10, CURLOPT_CONNECTTIMEOUT => 10, CURLOPT_RETURNTRANSFER => 1, CURLOPT_FAILONERROR => 1, CURLOPT_HEADER => false, CURLOPT_NOBODY => false, CURLOPT_NOSIGNAL => true, // CURLOPT_DNS_USE_GLOBAL_CACHE => true, // CURLOPT_DNS_CACHE_TIMEOUT => 86400, CURLOPT_SSL_VERIFYPEER => false, CURLOPT_SSL_VERIFYHOST => false ); $ch = curl_init(); curl_setopt_array($ch,$options); $responses = curl_exec($ch); curl_close ($ch); return $responses; } }